library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## Warning: package 'tibble' was built under R version 4.0.3
## Warning: package 'tidyr' was built under R version 4.0.3
## Warning: package 'readr' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(RColorBrewer)
library(cowplot)
library(gt)
## Warning: package 'gt' was built under R version 4.0.3
theme_set(theme_minimal())
National:
Personal:
Where is green space deprivation experienced?
Who is living in green space deprivation?
The relationship between green space deprivation and health?
Ideas:
data_sources <- tribble(
~file_name, ~name, ~notes, ~url,
"(FOE) Green Space Consolidated Data - England - Version 2.1.xlsx", "green_space", "...", "https://friendsoftheearth.uk/nature/green-space-consolidated-data-england",
"Local_Authority_District_to_Region__December_2019__Lookup_in_England.csv", "LAD_to_region", "used the December 2019 version", "https://geoportal.statistics.gov.uk/datasets/3ba3daf9278f47daba0f561889c3521a_0"
)
data_sources
Local Authority District to Region lookup (December 2019 used) - https://geoportal.statistics.gov.uk/datasets/3ba3daf9278f47daba0f561889c3521a_0
green_space <- read_excel("../Data/(FOE) Green Space Consolidated Data - England - Version 2.1.xlsx",
sheet = "MSOAs V2.1")
## New names:
## * `` -> ...1
green_space
# FOE green space data doesn't include a variable for the region each MSOA/local authority lies within
# So, import a look up table from the ONS, which maps between Local Authority Districts and Regions
LAD_to_region <- read_csv("../Data/Local_Authority_District_to_Region__December_2019__Lookup_in_England.csv") %>%
select(-FID, -LAD19NM)
##
## -- Column specification --------------------------------------------------------
## cols(
## FID = col_double(),
## LAD19CD = col_character(),
## LAD19NM = col_character(),
## RGN19CD = col_character(),
## RGN19NM = col_character()
## )
LAD_to_region
# add region names and codes (from ONS look up) to FOE green space data
green_space <- green_space %>%
left_join(LAD_to_region, by = c("LA_Code" = "LAD19CD")) %>%
relocate(RGN19CD, RGN19NM, .before = Area) %>%
rename(region_code = RGN19CD,
region_name = RGN19NM)
green_space
title_lab <- "Numbers of neighbourhoods in England by green space\ndeprivation rating\n"
subtitle_lab <- "E is highest level of green space deprivation\n\nA is lowest level of green space deprivation"
x_lab <- "Green space deprivation rating\n\n"
y_lab <- "\nNumber of Neighbourhoods\n"
caption_lab <- "Source: Friends of the Earth, England's Green Space Gap"
msoa_by_rating <- green_space %>%
group_by(Green_Space_Deprivation_Rating) %>%
summarise(msoa_count = n()) %>%
add_column(bar_label = c("Large or very large gardens and large or \nvery large amounts of public green space",
"",
"",
"",
"Very small gardens and very small\namounts of public green space") # add labels to be used on the plot
) %>%
ungroup() %>%
mutate(prop_rating = msoa_count / sum(msoa_count))
## `summarise()` ungrouping output (override with `.groups` argument)
msoa_by_rating
p <- ggplot(data = msoa_by_rating,
mapping = aes(x = Green_Space_Deprivation_Rating, y = msoa_count)
)
p + geom_segment(mapping = aes(x = Green_Space_Deprivation_Rating,
xend = Green_Space_Deprivation_Rating,
y = 750,
yend = msoa_count),
colour = "grey50",
size = 1.5
) +
geom_point(mapping = aes(fill = Green_Space_Deprivation_Rating),
size = 7.5,
shape = 21,
colour = "grey50",
stroke = 1.25
) +
scale_fill_brewer(type = "div",
palette = "RdYlGn",
direction = -1) +
labs(title = title_lab,
subtitle = subtitle_lab,
x = x_lab,
y = y_lab,
caption = caption_lab) +
guides(fill = FALSE) +
# facet_wrap(~ Green_Space_Deprivation_Rating) +
coord_flip()
colour_palette <- scales::seq_gradient_pal(low = "palegreen4", high = "grey")
colour_scale <- colour_palette(seq(0, 1, length.out = 5))
#colour_scale <- c("springgreen4", "palegreen4", "darkseagreen3", "grey75", "grey60")
p + geom_bar(mapping = aes(fill = Green_Space_Deprivation_Rating),
stat = "identity") +
scale_fill_manual(values = colour_scale) +
geom_text(mapping = aes(label = bar_label), hjust = 1.1, colour = "white", size = 3.5) +
guides(fill = FALSE) +
labs(title = title_lab,
x = x_lab,
y = y_lab,
caption = caption_lab) +
theme(axis.text.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold", size = 15),
plot.title = element_text(face = "bold", size = 15)
) +
coord_flip()
Below I plot the proportions of the 6791 MSOAs analyzed given each green space deprivation rating.
p <- ggplot(data = msoa_by_rating,
mapping = aes(x = Green_Space_Deprivation_Rating, y = prop_rating)
)
p + geom_bar(mapping = aes(fill = Green_Space_Deprivation_Rating),
stat = "identity") +
scale_fill_manual(values = colour_scale) +
geom_text(mapping = aes(label = bar_label), hjust = 1.1, colour = "white", size = 3.5) +
guides(fill = FALSE) +
labs(title = "Proportion of neighbourhoods in England by green space\ndeprivation rating\n",
x = x_lab,
y = "Proportion of neighbourhoods",
caption = caption_lab) +
theme(axis.text.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold", size = 15),
plot.title = element_text(face = "bold", size = 15)
) +
coord_flip()
msoa_by_rating
The two plots below, respectively show for each region the numbers or proportion of MSOA with each green space deprivation rating. Key insights from the two plots include:
msoa_by_region_and_rating <- green_space %>%
# count by region and rating
group_by(region_name, Green_Space_Deprivation_Rating) %>%
summarise(msoa_count = n()) %>%
filter(!is.na(region_name)) %>%
ungroup() %>%
# calculate rating proportions by region
group_by(region_name) %>%
mutate(prop = msoa_count / sum(msoa_count)) %>%
ungroup()
## `summarise()` regrouping output by 'region_name' (override with `.groups` argument)
msoa_by_region_and_rating
plot_msoas_by_region <- function(y_var){
p <- ggplot(data = msoa_by_region_and_rating,
mapping = aes(x = Green_Space_Deprivation_Rating, y = .data[[y_var]])
)
p + geom_bar(mapping = aes(fill = Green_Space_Deprivation_Rating),
stat = "identity",
colour = "grey80") +
scale_fill_brewer(type = "div",
palette = "RdYlGn",
direction = -1) +
guides(fill = FALSE) +
coord_flip() +
facet_wrap(~ region_name, ncol = 3)
}
plot_msoas_by_region("msoa_count")
plot_msoas_by_region("prop")
Below I plot again plot the proportion of MSOAs within each region receiving each green space deprivation rating. This time faceting the plot by green space deprivation rating rather than region. This makes it easier to compare across regions at a given rating.
It would be easier to read if I produced separate plots for each green space deprivation rating, as then the regions could be put in rank order.
prop_a <- msoa_by_region_and_rating %>%
filter(Green_Space_Deprivation_Rating == "A") %>%
mutate(prop_a = prop) %>%
select(region_name, prop_a)
msoa_by_region_and_rating <- msoa_by_region_and_rating %>%
left_join(prop_a)
## Joining, by = "region_name"
msoa_by_region_and_rating
p <- ggplot(data = msoa_by_region_and_rating,
mapping = aes(x = reorder(region_name, prop_a), y = prop, fill = region_name)
)
p + geom_bar(stat = "identity") +
facet_wrap(~ Green_Space_Deprivation_Rating) +
guides(fill = FALSE) +
coord_flip()
Next I explored an alternative appraoch to considering the distribution of msoas by rating and region. I plotted the proportion of MSOAs that received a specific rating by region. In order to address the ordering issue above, I produced one plot for each green space rating. This meant addressing the challenge of how to ensure the colour associated with a given region was applied consistently across the five plots. Here is where I found the an approach to doing this, using scale_…_manual.
How to map a colour to a value of a categorical variable …
This approach help me identify some additional insights:
# The idea is to produce one bar chart for each green space rating, and across these plots use the same colour for a given region.
# choose a palette to work with
region_pal <- brewer.pal(9, "Paired")
# map the region names to a specific colour value (in a dataframe for ease)
region_colours_df <- msoa_by_region_and_rating %>%
select(region_name) %>%
distinct(region_name) %>%
add_column(colour = region_pal)
# convert the region-colour mapping from a dataframe to a vector
# because a vector is required by scale_fill_manual
region_colours_vector <- region_colours_df$colour
names(region_colours_vector) <- region_colours_df$region_name
region_colours_vector
## East Midlands East of England London
## "#A6CEE3" "#1F78B4" "#B2DF8A"
## North East North West South East
## "#33A02C" "#FB9A99" "#E31A1C"
## South West West Midlands Yorkshire and The Humber
## "#FDBF6F" "#FF7F00" "#CAB2D6"
plot_gsdr_prop_by_region <- function(rating = "A"){
msoa_by_region_and_rating %>%
# focus on the rating of interest
filter(Green_Space_Deprivation_Rating == rating) %>%
mutate(proportion = msoa_count / sum(msoa_count)) %>%
# create the bar chart
ggplot(mapping = aes(x = reorder(region_name, proportion),
y = proportion,
fill = region_name
)
) +
geom_col() +
# apply region-colour mapping
scale_fill_manual(values = region_colours_vector) +
guides(fill = FALSE) +
labs(title = str_c("MSOAs receiving green space deprivation rating: ", rating),
x = NULL,
y = str_c("Proportion of the MSOAs that recieved a rating of ", rating)
) +
coord_flip()
}
# produce one plot for each of the five green space deprivation ratings
gsdr_prop_by_region_plots <- c("A", "B", "C", "D", "E") %>%
map(~ plot_gsdr_prop_by_region(.x))
gsdr_prop_by_region_plots
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
d_plot <- gsdr_prop_by_region_plots[[4]] +
ylim(0, 0.45) # ensure the plot axis scales match
e_plot <- gsdr_prop_by_region_plots[[5]] +
ylim(0, 0.45) # ensure the plot axis scales match
plot_grid(d_plot, e_plot, ncol = 2)
Could do ridgeline plots for each region - for prop rated A and proportion rated E
https://datacarpentry.org/r-raster-vector-geospatial/06-vector-open-shapefile-in-r/
library(sf)
msoa_sf <- st_read("../Data/Middle_Layer_Super_Output_Areas/Middle_Layer_Super_Output_Areas__December_2011__Boundaries_EW_BFE.shp")
# msoa_sf <- raster::shapefile("../Data/Middle_Layer_Super_Output_Areas/Middle_Layer_Super_Output_Areas__December_2011__Boundaries_EW_BFE.shp")
msoa_sf_gs <- msoa_sf %>%
left_join(green_space, by = c("MSOA11CD" = "MSOA_Code"))
A quick visual inspection of the MSOAs colored by their green space deprivation rating, shows a similiar patter across the regions (with the exception of London). With the the D and E ratings (oranges and reds) occurring in smaller (presumably more densely populated MSOAs) which make up urban areas. While the larger, more rural MSOAs tend to be less green space deprived, and have A or B ratings. Given the whole region of London would probably be considered a continuous urban space, it is unsurprising to observe many green space deprived MSOAs across the region/plot, with relatively few less green space deprived areas present.
green_space_region_plot <- function(region){
p <- ggplot(data = msoa_sf_gs %>%
filter(region_name == region),
mapping = aes(fill = Green_Space_Deprivation_Rating)
)
p + geom_sf(colour = "grey75") +
scale_fill_brewer(type = "div",
palette = "RdYlGn",
direction = -1) +
labs(title = str_c("...", region))
}
region_names <- green_space %>%
select(region_name) %>%
distinct(region_name)
region_names %>%
filter(!is.na(region_name)) %>%
pmap(~ green_space_region_plot(region = .x))
green_space <- green_space %>%
mutate(prop_BAME_pop = BAME_Pop / Total_Pop_From_Ethnicity_Data) %>%
relocate(prop_BAME_pop, .after = BAME_Pop)
p <- ggplot(data = green_space,
mapping = aes(x = Green_Space_Deprivation_Rating,
y = prop_BAME_pop)
)
p + geom_jitter(alpha = 0.5, colour = "grey75") +
geom_boxplot(mapping = aes(fill = Green_Space_Deprivation_Rating),
alpha = 0.5,
size = 1.25,
colour = "grey25") +
scale_fill_brewer(type = "div",
palette = "RdYlGn",
direction = -1) +
guides(fill = FALSE) +
coord_flip()
library(ggridges)
p <- ggplot(data = green_space,
mapping = aes(y = Green_Space_Deprivation_Rating,
x = prop_BAME_pop,
fill = Green_Space_Deprivation_Rating)
)
p + geom_density_ridges(alpha = 0.6, scale = 1.5) +
scale_fill_brewer(type = "div",
palette = "RdYlGn",
direction = -1)
## Picking joint bandwidth of 0.0272